├── .gitignore ├── LICENSE ├── README.md ├── assets ├── logo.png ├── shikra_case_1.jpg └── teaser.jpg ├── config ├── _base_ │ ├── dataset │ │ ├── DEFAULT_TEST_CLEVR_VARIANT.py │ │ ├── DEFAULT_TEST_DATASET.py │ │ ├── DEFAULT_TEST_FLICKR_VARIANT.py │ │ ├── DEFAULT_TEST_GPTGEN_VARIANT.py │ │ ├── DEFAULT_TEST_GQA_VARIANT.py │ │ ├── DEFAULT_TEST_POINT_VARIANT.py │ │ ├── DEFAULT_TEST_POPE_VARIANT.py │ │ ├── DEFAULT_TEST_REC_VARIANT.py │ │ ├── DEFAULT_TEST_RES_VARIANT.py │ │ ├── DEFAULT_TEST_VCR_VARIANT.py │ │ ├── DEFAULT_TEST_VQAv2_VARIANT.py │ │ ├── DEFAULT_TRAIN_CLEVR_VARIANT.py │ │ ├── DEFAULT_TRAIN_DATASET.py │ │ ├── DEFAULT_TRAIN_DATASET_debug.py │ │ ├── DEFAULT_TRAIN_GPTGENMASK_VARIANT.py │ │ ├── DEFAULT_TRAIN_GPTGEN_VARIANT.py │ │ ├── DEFAULT_TRAIN_GQA_VARIANT.py │ │ ├── DEFAULT_TRAIN_POINT_VARIANT.py │ │ ├── DEFAULT_TRAIN_VCR_VARIANT.py │ │ ├── DEFAULT_TRAIN_VQAEX_VARIANT.py │ │ ├── DEFAULT_TRAIN_VQAv2_VARIANT.py │ │ ├── mix_pretrain_concat3.py │ │ ├── mix_pretrain_concat8.py │ │ ├── mix_pretrain_final19.py │ │ ├── mix_pretrain_final55.py │ │ ├── mix_pretrain_final_rec+vg.py │ │ ├── mix_pretrain_final_rec.py │ │ └── template │ │ │ ├── GC.json │ │ │ ├── GC_bak.json │ │ │ ├── GC_mask.json │ │ │ ├── InstanceSeg.json │ │ │ ├── REC.json │ │ │ ├── REC_bak.json │ │ │ ├── REC_img_exp.json │ │ │ ├── REG.json │ │ │ ├── RES.json │ │ │ ├── VQA.json │ │ │ ├── VQA_BCoT.json │ │ │ ├── VQA_BCoT_mask.json │ │ │ ├── VQA_CoT.json │ │ │ ├── VQA_PCoT.json │ │ │ ├── box2seg.json │ │ │ ├── flickr30k.json │ │ │ ├── flickr30k_bak.json │ │ │ ├── flickr30k_mask.json │ │ │ ├── flickr30k_mask_back.json │ │ │ └── image_cap.json │ ├── model │ │ ├── shikra.py │ │ ├── shikra_13b.py │ │ ├── shikra_13bo.py │ │ ├── shikra_7b.py │ │ └── shikra_7bo.py │ └── train │ │ ├── eval.py │ │ ├── shikra.py │ │ ├── shikra_deepspeed_lora.py │ │ └── shikra_fsdp.py └── training_configs │ └── shikra3_rec3_mask_box_cls_refcoco_all.py ├── deepspeed ├── ds_config_zero2.json ├── ds_config_zero2_offload.json └── ds_config_zero3.json ├── docs └── data.md ├── mllm ├── __init__.py ├── config │ ├── __init__.py │ └── config.py ├── conversation │ ├── __init__.py │ └── base_conversation.py ├── dataset │ ├── __init__.py │ ├── builder.py │ ├── process_function │ │ ├── __init__.py │ │ ├── box_process_function.py │ │ └── shikra_process_function.py │ ├── root.py │ ├── single_image_convsation.py │ ├── single_image_dataset │ │ ├── __init__.py │ │ ├── box2seg.py │ │ ├── caption.py │ │ ├── clevr.py │ │ ├── flickr.py │ │ ├── flickr_mask.py │ │ ├── gpt_gen.py │ │ ├── gpt_gen_mask.py │ │ ├── gqa.py │ │ ├── gref.py │ │ ├── instance_seg.py │ │ ├── instr.py │ │ ├── point_qa.py │ │ ├── pope.py │ │ ├── rec.py │ │ ├── ref_mask.py │ │ ├── ref_mask_refcoco.py │ │ ├── ref_mask_vg.py │ │ ├── reg.py │ │ ├── reg_mask.py │ │ ├── vcr.py │ │ ├── vqaex.py │ │ └── vqav2.py │ ├── single_image_interactive.py │ └── utils │ │ ├── __init__.py │ │ ├── compute_metrics.py │ │ ├── concatenate_dataset.py │ │ ├── flickr30k_entities_utils.py │ │ ├── io.py │ │ ├── mixin.py │ │ └── transform.py ├── demo │ ├── __init__.py │ ├── assets │ │ ├── DejaVuSansMono.ttf │ │ ├── airplane.jpg │ │ ├── ball.jpg │ │ ├── banana_phone.png │ │ ├── baseball.png │ │ ├── bear-792466_1280.jpg │ │ ├── bearhat.png │ │ ├── boxes_seq_explanation.jpg │ │ ├── dog_rabbit.jpg │ │ ├── dog_selfcontrol.jpg │ │ ├── fishing.jpg │ │ ├── food-1898194_640.jpg │ │ ├── fruits.jpg │ │ ├── g2.jpg │ │ ├── giraffes.jpg │ │ ├── logo.png │ │ ├── man.jpg │ │ ├── oven.jpg │ │ ├── petal_20230711_153216_Compressed.mp4 │ │ ├── potato.jpg │ │ ├── proposal.jpg │ │ ├── puzzle.jpg │ │ ├── rec_bear.png │ │ ├── staircase-274614_640.jpg │ │ ├── water_question.jpg │ │ ├── wet_paint1.jpg │ │ └── woman_door.jpg │ ├── client.py │ ├── server.py │ ├── temp │ │ └── tmpf83m4lbn.jpg │ └── webdemo.py ├── engine │ ├── __init__.py │ ├── base_engine.py │ ├── builder.py │ ├── perception_trainer.py │ └── shikra.py ├── header.py ├── models │ ├── ImageBind │ │ ├── CODE_OF_CONDUCT.md │ │ ├── CONTRIBUTING.md │ │ ├── LICENSE │ │ ├── README.md │ │ ├── __init__.py │ │ ├── bpe │ │ │ └── bpe_simple_vocab_16e6.txt.gz │ │ ├── data.py │ │ ├── model_card.md │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── helpers.py │ │ │ ├── imagebind_model.py │ │ │ ├── multimodal_preprocessors.py │ │ │ └── transformer.py │ │ └── requirements.txt │ ├── __init__.py │ ├── autoencoder │ │ ├── __init__.py │ │ ├── blocks │ │ │ ├── __init__.py │ │ │ ├── decoder_layer.py │ │ │ └── encoder_layer.py │ │ ├── embedding │ │ │ ├── __init__.py │ │ │ ├── positional_encoding.py │ │ │ ├── token_embeddings.py │ │ │ └── transformer_embedding.py │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── layer_norm.py │ │ │ ├── multi_head_attention.py │ │ │ ├── position_wise_feed_forward.py │ │ │ └── scale_dot_product_attention.py │ │ └── model │ │ │ ├── __init__.py │ │ │ ├── conv_encoder_decoder.py │ │ │ ├── decoder.py │ │ │ ├── encoder.py │ │ │ ├── resnet.py │ │ │ ├── resnet_back.py │ │ │ ├── resnet_layernorm.py │ │ │ ├── resnet_layernorm_4x4.py │ │ │ ├── resnet_layernorm_spatial.py │ │ │ ├── resnet_upsamplr.py │ │ │ ├── transformer.py │ │ │ ├── transformer_codebook.py │ │ │ ├── transformer_discrete.py │ │ │ ├── transformer_lstm.py │ │ │ ├── transformer_lstm_codebook.py │ │ │ ├── transformer_mask.py │ │ │ └── transformer_mlp.py │ ├── builder │ │ ├── __init__.py │ │ ├── build_perceptionGPT.py │ │ └── builder.py │ ├── enhancer │ │ ├── __init__.py │ │ ├── common.py │ │ ├── sam.py │ │ └── transformer.py │ ├── perceptionGPT │ │ ├── __init__.py │ │ ├── apply_delta.py │ │ ├── make_delta.py │ │ ├── mask_decoder.py │ │ ├── peft_for_shikra.py │ │ └── perceptionGPT.py │ ├── utils │ │ ├── __init__.py │ │ └── modeling_outputs.py │ └── vision_towers │ │ ├── __init__.py │ │ ├── clip_custom.py │ │ └── clip_custom_bak.py ├── pipeline │ ├── __init__.py │ └── finetune.py └── utils │ ├── __init__.py │ ├── box_utils.py │ ├── common.py │ ├── dice_loss.py │ ├── llama_flash_attn_monkey_patch.py │ ├── mask_utils.py │ └── utils.py ├── prepare_data ├── data_process_gref.py ├── data_process_shikra.py ├── grefer.py ├── prepare.sh ├── refcoco_collect_info.py └── refer.py ├── requirements.txt └── scripts └── run.sh /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/README.md -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/assets/logo.png -------------------------------------------------------------------------------- /assets/shikra_case_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/assets/shikra_case_1.jpg -------------------------------------------------------------------------------- /assets/teaser.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/assets/teaser.jpg -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TEST_CLEVR_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TEST_CLEVR_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TEST_DATASET.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TEST_DATASET.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TEST_FLICKR_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TEST_FLICKR_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TEST_GPTGEN_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TEST_GPTGEN_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TEST_GQA_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TEST_GQA_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TEST_POINT_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TEST_POINT_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TEST_POPE_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TEST_POPE_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TEST_REC_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TEST_REC_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TEST_RES_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TEST_RES_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TEST_VCR_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TEST_VCR_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TEST_VQAv2_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TEST_VQAv2_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TRAIN_CLEVR_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TRAIN_CLEVR_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TRAIN_DATASET.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TRAIN_DATASET.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TRAIN_DATASET_debug.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TRAIN_DATASET_debug.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TRAIN_GPTGENMASK_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TRAIN_GPTGENMASK_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TRAIN_GPTGEN_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TRAIN_GPTGEN_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TRAIN_GQA_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TRAIN_GQA_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TRAIN_POINT_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TRAIN_POINT_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TRAIN_VCR_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TRAIN_VCR_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TRAIN_VQAEX_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TRAIN_VQAEX_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/DEFAULT_TRAIN_VQAv2_VARIANT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/DEFAULT_TRAIN_VQAv2_VARIANT.py -------------------------------------------------------------------------------- /config/_base_/dataset/mix_pretrain_concat3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/mix_pretrain_concat3.py -------------------------------------------------------------------------------- /config/_base_/dataset/mix_pretrain_concat8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/mix_pretrain_concat8.py -------------------------------------------------------------------------------- /config/_base_/dataset/mix_pretrain_final19.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/mix_pretrain_final19.py -------------------------------------------------------------------------------- /config/_base_/dataset/mix_pretrain_final55.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/mix_pretrain_final55.py -------------------------------------------------------------------------------- /config/_base_/dataset/mix_pretrain_final_rec+vg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/mix_pretrain_final_rec+vg.py -------------------------------------------------------------------------------- /config/_base_/dataset/mix_pretrain_final_rec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/mix_pretrain_final_rec.py -------------------------------------------------------------------------------- /config/_base_/dataset/template/GC.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/GC.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/GC_bak.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/GC_bak.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/GC_mask.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/GC_mask.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/InstanceSeg.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/InstanceSeg.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/REC.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/REC.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/REC_bak.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/REC_bak.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/REC_img_exp.json: -------------------------------------------------------------------------------- 1 | [ 2 | " " 3 | ] -------------------------------------------------------------------------------- /config/_base_/dataset/template/REG.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/REG.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/RES.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/RES.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/VQA.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/VQA.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/VQA_BCoT.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/VQA_BCoT.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/VQA_BCoT_mask.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/VQA_BCoT_mask.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/VQA_CoT.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/VQA_CoT.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/VQA_PCoT.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/VQA_PCoT.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/box2seg.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/box2seg.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/flickr30k.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/flickr30k.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/flickr30k_bak.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/flickr30k_bak.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/flickr30k_mask.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/flickr30k_mask.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/flickr30k_mask_back.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/flickr30k_mask_back.json -------------------------------------------------------------------------------- /config/_base_/dataset/template/image_cap.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/dataset/template/image_cap.json -------------------------------------------------------------------------------- /config/_base_/model/shikra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/model/shikra.py -------------------------------------------------------------------------------- /config/_base_/model/shikra_13b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/model/shikra_13b.py -------------------------------------------------------------------------------- /config/_base_/model/shikra_13bo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/model/shikra_13bo.py -------------------------------------------------------------------------------- /config/_base_/model/shikra_7b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/model/shikra_7b.py -------------------------------------------------------------------------------- /config/_base_/model/shikra_7bo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/model/shikra_7bo.py -------------------------------------------------------------------------------- /config/_base_/train/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/train/eval.py -------------------------------------------------------------------------------- /config/_base_/train/shikra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/train/shikra.py -------------------------------------------------------------------------------- /config/_base_/train/shikra_deepspeed_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/train/shikra_deepspeed_lora.py -------------------------------------------------------------------------------- /config/_base_/train/shikra_fsdp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/_base_/train/shikra_fsdp.py -------------------------------------------------------------------------------- /config/training_configs/shikra3_rec3_mask_box_cls_refcoco_all.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/config/training_configs/shikra3_rec3_mask_box_cls_refcoco_all.py -------------------------------------------------------------------------------- /deepspeed/ds_config_zero2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/deepspeed/ds_config_zero2.json -------------------------------------------------------------------------------- /deepspeed/ds_config_zero2_offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/deepspeed/ds_config_zero2_offload.json -------------------------------------------------------------------------------- /deepspeed/ds_config_zero3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/deepspeed/ds_config_zero3.json -------------------------------------------------------------------------------- /docs/data.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/docs/data.md -------------------------------------------------------------------------------- /mllm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mllm/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import prepare_args 2 | -------------------------------------------------------------------------------- /mllm/config/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/config/config.py -------------------------------------------------------------------------------- /mllm/conversation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/conversation/__init__.py -------------------------------------------------------------------------------- /mllm/conversation/base_conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/conversation/base_conversation.py -------------------------------------------------------------------------------- /mllm/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/__init__.py -------------------------------------------------------------------------------- /mllm/dataset/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/builder.py -------------------------------------------------------------------------------- /mllm/dataset/process_function/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/process_function/__init__.py -------------------------------------------------------------------------------- /mllm/dataset/process_function/box_process_function.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/process_function/box_process_function.py -------------------------------------------------------------------------------- /mllm/dataset/process_function/shikra_process_function.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/process_function/shikra_process_function.py -------------------------------------------------------------------------------- /mllm/dataset/root.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/root.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_convsation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_convsation.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/__init__.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/box2seg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/box2seg.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/caption.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/caption.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/clevr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/clevr.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/flickr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/flickr.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/flickr_mask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/flickr_mask.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/gpt_gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/gpt_gen.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/gpt_gen_mask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/gpt_gen_mask.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/gqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/gqa.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/gref.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/gref.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/instance_seg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/instance_seg.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/instr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/instr.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/point_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/point_qa.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/pope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/pope.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/rec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/rec.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/ref_mask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/ref_mask.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/ref_mask_refcoco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/ref_mask_refcoco.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/ref_mask_vg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/ref_mask_vg.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/reg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/reg.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/reg_mask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/reg_mask.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/vcr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/vcr.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/vqaex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/vqaex.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_dataset/vqav2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_dataset/vqav2.py -------------------------------------------------------------------------------- /mllm/dataset/single_image_interactive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/single_image_interactive.py -------------------------------------------------------------------------------- /mllm/dataset/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/utils/__init__.py -------------------------------------------------------------------------------- /mllm/dataset/utils/compute_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/utils/compute_metrics.py -------------------------------------------------------------------------------- /mllm/dataset/utils/concatenate_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/utils/concatenate_dataset.py -------------------------------------------------------------------------------- /mllm/dataset/utils/flickr30k_entities_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/utils/flickr30k_entities_utils.py -------------------------------------------------------------------------------- /mllm/dataset/utils/io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/utils/io.py -------------------------------------------------------------------------------- /mllm/dataset/utils/mixin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/utils/mixin.py -------------------------------------------------------------------------------- /mllm/dataset/utils/transform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/dataset/utils/transform.py -------------------------------------------------------------------------------- /mllm/demo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mllm/demo/assets/DejaVuSansMono.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/DejaVuSansMono.ttf -------------------------------------------------------------------------------- /mllm/demo/assets/airplane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/airplane.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/ball.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/ball.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/banana_phone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/banana_phone.png -------------------------------------------------------------------------------- /mllm/demo/assets/baseball.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/baseball.png -------------------------------------------------------------------------------- /mllm/demo/assets/bear-792466_1280.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/bear-792466_1280.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/bearhat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/bearhat.png -------------------------------------------------------------------------------- /mllm/demo/assets/boxes_seq_explanation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/boxes_seq_explanation.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/dog_rabbit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/dog_rabbit.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/dog_selfcontrol.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/dog_selfcontrol.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/fishing.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/fishing.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/food-1898194_640.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/food-1898194_640.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/fruits.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/fruits.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/g2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/g2.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/giraffes.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/giraffes.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/logo.png -------------------------------------------------------------------------------- /mllm/demo/assets/man.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/man.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/oven.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/oven.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/petal_20230711_153216_Compressed.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/petal_20230711_153216_Compressed.mp4 -------------------------------------------------------------------------------- /mllm/demo/assets/potato.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/potato.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/proposal.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/proposal.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/puzzle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/puzzle.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/rec_bear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/rec_bear.png -------------------------------------------------------------------------------- /mllm/demo/assets/staircase-274614_640.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/staircase-274614_640.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/water_question.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/water_question.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/wet_paint1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/wet_paint1.jpg -------------------------------------------------------------------------------- /mllm/demo/assets/woman_door.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/assets/woman_door.jpg -------------------------------------------------------------------------------- /mllm/demo/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/client.py -------------------------------------------------------------------------------- /mllm/demo/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/server.py -------------------------------------------------------------------------------- /mllm/demo/temp/tmpf83m4lbn.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/temp/tmpf83m4lbn.jpg -------------------------------------------------------------------------------- /mllm/demo/webdemo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/demo/webdemo.py -------------------------------------------------------------------------------- /mllm/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/engine/__init__.py -------------------------------------------------------------------------------- /mllm/engine/base_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/engine/base_engine.py -------------------------------------------------------------------------------- /mllm/engine/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/engine/builder.py -------------------------------------------------------------------------------- /mllm/engine/perception_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/engine/perception_trainer.py -------------------------------------------------------------------------------- /mllm/engine/shikra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/engine/shikra.py -------------------------------------------------------------------------------- /mllm/header.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/header.py -------------------------------------------------------------------------------- /mllm/models/ImageBind/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/ImageBind/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /mllm/models/ImageBind/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/ImageBind/CONTRIBUTING.md -------------------------------------------------------------------------------- /mllm/models/ImageBind/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/ImageBind/LICENSE -------------------------------------------------------------------------------- /mllm/models/ImageBind/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/ImageBind/README.md -------------------------------------------------------------------------------- /mllm/models/ImageBind/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/ImageBind/__init__.py -------------------------------------------------------------------------------- /mllm/models/ImageBind/bpe/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/ImageBind/bpe/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /mllm/models/ImageBind/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/ImageBind/data.py -------------------------------------------------------------------------------- /mllm/models/ImageBind/model_card.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/ImageBind/model_card.md -------------------------------------------------------------------------------- /mllm/models/ImageBind/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mllm/models/ImageBind/models/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/ImageBind/models/helpers.py -------------------------------------------------------------------------------- /mllm/models/ImageBind/models/imagebind_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/ImageBind/models/imagebind_model.py -------------------------------------------------------------------------------- /mllm/models/ImageBind/models/multimodal_preprocessors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/ImageBind/models/multimodal_preprocessors.py -------------------------------------------------------------------------------- /mllm/models/ImageBind/models/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/ImageBind/models/transformer.py -------------------------------------------------------------------------------- /mllm/models/ImageBind/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/ImageBind/requirements.txt -------------------------------------------------------------------------------- /mllm/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/__init__.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/__init__.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/blocks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mllm/models/autoencoder/blocks/decoder_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/blocks/decoder_layer.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/blocks/encoder_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/blocks/encoder_layer.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/embedding/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/embedding/__init__.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/embedding/positional_encoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/embedding/positional_encoding.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/embedding/token_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/embedding/token_embeddings.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/embedding/transformer_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/embedding/transformer_embedding.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/layers/__init__.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/layers/layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/layers/layer_norm.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/layers/multi_head_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/layers/multi_head_attention.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/layers/position_wise_feed_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/layers/position_wise_feed_forward.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/layers/scale_dot_product_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/layers/scale_dot_product_attention.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/model/__init__.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/model/conv_encoder_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/model/conv_encoder_decoder.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/model/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/model/decoder.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/model/encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/model/encoder.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/model/resnet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/model/resnet.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/model/resnet_back.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/model/resnet_back.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/model/resnet_layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/model/resnet_layernorm.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/model/resnet_layernorm_4x4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/model/resnet_layernorm_4x4.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/model/resnet_layernorm_spatial.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/model/resnet_layernorm_spatial.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/model/resnet_upsamplr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/model/resnet_upsamplr.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/model/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/model/transformer.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/model/transformer_codebook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/model/transformer_codebook.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/model/transformer_discrete.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/model/transformer_discrete.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/model/transformer_lstm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/model/transformer_lstm.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/model/transformer_lstm_codebook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/model/transformer_lstm_codebook.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/model/transformer_mask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/model/transformer_mask.py -------------------------------------------------------------------------------- /mllm/models/autoencoder/model/transformer_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/autoencoder/model/transformer_mlp.py -------------------------------------------------------------------------------- /mllm/models/builder/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import load_pretrained 2 | -------------------------------------------------------------------------------- /mllm/models/builder/build_perceptionGPT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/builder/build_perceptionGPT.py -------------------------------------------------------------------------------- /mllm/models/builder/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/builder/builder.py -------------------------------------------------------------------------------- /mllm/models/enhancer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mllm/models/enhancer/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/enhancer/common.py -------------------------------------------------------------------------------- /mllm/models/enhancer/sam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/enhancer/sam.py -------------------------------------------------------------------------------- /mllm/models/enhancer/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/enhancer/transformer.py -------------------------------------------------------------------------------- /mllm/models/perceptionGPT/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/perceptionGPT/__init__.py -------------------------------------------------------------------------------- /mllm/models/perceptionGPT/apply_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/perceptionGPT/apply_delta.py -------------------------------------------------------------------------------- /mllm/models/perceptionGPT/make_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/perceptionGPT/make_delta.py -------------------------------------------------------------------------------- /mllm/models/perceptionGPT/mask_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/perceptionGPT/mask_decoder.py -------------------------------------------------------------------------------- /mllm/models/perceptionGPT/peft_for_shikra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/perceptionGPT/peft_for_shikra.py -------------------------------------------------------------------------------- /mllm/models/perceptionGPT/perceptionGPT.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/perceptionGPT/perceptionGPT.py -------------------------------------------------------------------------------- /mllm/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mllm/models/utils/modeling_outputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/utils/modeling_outputs.py -------------------------------------------------------------------------------- /mllm/models/vision_towers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mllm/models/vision_towers/clip_custom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/vision_towers/clip_custom.py -------------------------------------------------------------------------------- /mllm/models/vision_towers/clip_custom_bak.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/models/vision_towers/clip_custom_bak.py -------------------------------------------------------------------------------- /mllm/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mllm/pipeline/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/pipeline/finetune.py -------------------------------------------------------------------------------- /mllm/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/utils/__init__.py -------------------------------------------------------------------------------- /mllm/utils/box_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/utils/box_utils.py -------------------------------------------------------------------------------- /mllm/utils/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/utils/common.py -------------------------------------------------------------------------------- /mllm/utils/dice_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/utils/dice_loss.py -------------------------------------------------------------------------------- /mllm/utils/llama_flash_attn_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/utils/llama_flash_attn_monkey_patch.py -------------------------------------------------------------------------------- /mllm/utils/mask_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/utils/mask_utils.py -------------------------------------------------------------------------------- /mllm/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/mllm/utils/utils.py -------------------------------------------------------------------------------- /prepare_data/data_process_gref.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/prepare_data/data_process_gref.py -------------------------------------------------------------------------------- /prepare_data/data_process_shikra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/prepare_data/data_process_shikra.py -------------------------------------------------------------------------------- /prepare_data/grefer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/prepare_data/grefer.py -------------------------------------------------------------------------------- /prepare_data/prepare.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/prepare_data/prepare.sh -------------------------------------------------------------------------------- /prepare_data/refcoco_collect_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/prepare_data/refcoco_collect_info.py -------------------------------------------------------------------------------- /prepare_data/refer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/prepare_data/refer.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pipilurj/perceptionGPT/HEAD/scripts/run.sh --------------------------------------------------------------------------------