├── .gitignore ├── .gitmodules ├── README.md ├── XMem ├── dataset │ ├── __init__.py │ ├── range_transform.py │ ├── reseed.py │ ├── static_dataset.py │ ├── tps.py │ ├── util.py │ └── vos_dataset.py ├── eval.py ├── eval_batch.py ├── generate_xmem_data_single.py ├── inference │ ├── __init__.py │ ├── data │ │ ├── __init__.py │ │ ├── mask_mapper.py │ │ ├── test_datasets.py │ │ └── video_reader.py │ ├── inference_core.py │ ├── interact │ │ ├── __init__.py │ │ ├── fbrs │ │ │ ├── LICENSE │ │ │ ├── __init__.py │ │ │ ├── controller.py │ │ │ ├── inference │ │ │ │ ├── __init__.py │ │ │ │ ├── clicker.py │ │ │ │ ├── evaluation.py │ │ │ │ ├── predictors │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── brs.py │ │ │ │ │ ├── brs_functors.py │ │ │ │ │ └── brs_losses.py │ │ │ │ ├── transforms │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── crops.py │ │ │ │ │ ├── flip.py │ │ │ │ │ ├── limit_longest_side.py │ │ │ │ │ └── zoom_in.py │ │ │ │ └── utils.py │ │ │ ├── model │ │ │ │ ├── __init__.py │ │ │ │ ├── initializer.py │ │ │ │ ├── is_deeplab_model.py │ │ │ │ ├── is_hrnet_model.py │ │ │ │ ├── losses.py │ │ │ │ ├── metrics.py │ │ │ │ ├── modeling │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── basic_blocks.py │ │ │ │ │ ├── deeplab_v3.py │ │ │ │ │ ├── hrnet_ocr.py │ │ │ │ │ ├── ocr.py │ │ │ │ │ ├── resnet.py │ │ │ │ │ └── resnetv1b.py │ │ │ │ ├── ops.py │ │ │ │ └── syncbn │ │ │ │ │ ├── LICENSE │ │ │ │ │ ├── README.md │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── modules │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── functional │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── _csrc.py │ │ │ │ │ ├── csrc │ │ │ │ │ │ ├── bn.h │ │ │ │ │ │ ├── cuda │ │ │ │ │ │ │ ├── bn_cuda.cu │ │ │ │ │ │ │ ├── common.h │ │ │ │ │ │ │ └── ext_lib.h │ │ │ │ │ │ └── ext_lib.cpp │ │ │ │ │ └── syncbn.py │ │ │ │ │ └── nn │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── syncbn.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── cython │ │ │ │ ├── __init__.py │ │ │ │ ├── _get_dist_maps.pyx │ │ │ │ ├── _get_dist_maps.pyxbld │ │ │ │ └── dist_maps.py │ │ │ │ ├── misc.py │ │ │ │ └── vis.py │ │ ├── fbrs_controller.py │ │ ├── gui.py │ │ ├── gui_utils.py │ │ ├── interaction.py │ │ ├── interactive_utils.py │ │ ├── resource_manager.py │ │ ├── s2m │ │ │ ├── __init__.py │ │ │ ├── _deeplab.py │ │ │ ├── s2m_network.py │ │ │ ├── s2m_resnet.py │ │ │ └── utils.py │ │ ├── s2m_controller.py │ │ └── timer.py │ ├── kv_memory_store.py │ └── memory_manager.py ├── interactive_demo.py ├── merge_multi_scale.py ├── merge_results.py ├── model │ ├── __init__.py │ ├── aggregate.py │ ├── cbam.py │ ├── group_modules.py │ ├── losses.py │ ├── memory_util.py │ ├── modules.py │ ├── network.py │ ├── resnet.py │ └── trainer.py ├── requirements.txt ├── scripts │ ├── __init__.py │ ├── download_bl30k.py │ ├── download_datasets.py │ ├── download_models.sh │ ├── download_models_demo.sh │ ├── expand_long_vid.py │ └── resize_youtube.py ├── tracking.py ├── train.py └── util │ ├── __init__.py │ ├── configuration.py │ ├── davis_subset.txt │ ├── image_saver.py │ ├── load_subset.py │ ├── log_integrator.py │ ├── logger.py │ ├── palette.py │ ├── tensor_util.py │ └── yv_subset.txt ├── assert ├── architecture.png └── performance.png ├── merge_lora_weights_and_save_hf_model.py ├── model ├── VISA.py ├── llava │ ├── __init__.py │ ├── constants.py │ ├── conversation.py │ ├── mm_utils.py │ ├── model │ │ ├── __init__.py │ │ ├── apply_delta.py │ │ ├── builder.py │ │ ├── consolidate.py │ │ ├── language_model │ │ │ ├── llava_llama.py │ │ │ ├── llava_mpt.py │ │ │ └── mpt │ │ │ │ ├── adapt_tokenizer.py │ │ │ │ ├── attention.py │ │ │ │ ├── blocks.py │ │ │ │ ├── configuration_mpt.py │ │ │ │ ├── custom_embedding.py │ │ │ │ ├── flash_attn_triton.py │ │ │ │ ├── hf_prefixlm_converter.py │ │ │ │ ├── meta_init_context.py │ │ │ │ ├── modeling_mpt.py │ │ │ │ ├── norm.py │ │ │ │ └── param_init_fns.py │ │ ├── llava_arch.py │ │ ├── make_delta.py │ │ ├── multimodal_encoder │ │ │ ├── builder.py │ │ │ └── clip_encoder.py │ │ └── utils.py │ ├── train │ │ ├── llama_flash_attn_monkey_patch.py │ │ ├── llava_trainer.py │ │ ├── train.py │ │ └── train_mem.py │ └── utils.py ├── segment_anything │ ├── __init__.py │ ├── automatic_mask_generator.py │ ├── build_sam.py │ ├── modeling │ │ ├── __init__.py │ │ ├── common.py │ │ ├── image_encoder.py │ │ ├── mask_decoder.py │ │ ├── prompt_encoder.py │ │ ├── sam.py │ │ └── transformer.py │ ├── predictor.py │ └── utils │ │ ├── __init__.py │ │ ├── amg.py │ │ ├── onnx.py │ │ └── transforms.py ├── tf │ └── modeling_outputs.py └── univi │ ├── __init__.py │ ├── config │ ├── __init__.py │ ├── dataset_config.py │ └── model_config.py │ ├── constants.py │ ├── conversation.py │ ├── demo.py │ ├── eval │ ├── evaluate │ │ ├── evaluate_benchmark_1_correctness.py │ │ ├── evaluate_benchmark_2_detailed_orientation.py │ │ ├── evaluate_benchmark_3_context.py │ │ ├── evaluate_benchmark_4_temporal.py │ │ ├── evaluate_benchmark_5_consistency.py │ │ ├── evaluate_gpt_review_visual.py │ │ ├── evaluate_science_qa.py │ │ ├── evaluate_video_qa.py │ │ └── summarize_gpt_review.py │ ├── model_coco_vqa.py │ ├── model_video_consistency.py │ ├── model_video_general.py │ ├── model_video_qa.py │ ├── model_vqa.py │ ├── model_vqa_scienceqa.py │ ├── questions │ │ ├── coco2014_val_qa_eval │ │ │ ├── qa90_gpt4_answer.jsonl │ │ │ └── qa90_questions.jsonl │ │ ├── coco_pope │ │ │ ├── coco_pope_adversarial.jsonl │ │ │ ├── coco_pope_popular.jsonl │ │ │ └── coco_pope_random.jsonl │ │ ├── scienceqa │ │ │ ├── pid_splits.json │ │ │ ├── problems.json │ │ │ └── test_QCM-LEA.json │ │ └── video_qa │ │ │ ├── activitynet_a_list.json │ │ │ ├── activitynet_qa.json │ │ │ ├── consistency_qa.json │ │ │ ├── generic_qa.json │ │ │ ├── msrvtt_a_list.json │ │ │ ├── msrvtt_qa.json │ │ │ ├── msvd_a_list.json │ │ │ ├── msvd_qa.json │ │ │ ├── temporal_qa.json │ │ │ ├── tgif_a_list.json │ │ │ └── tgif_qa.json │ └── table │ │ ├── caps_boxes_coco2014_val_80.jsonl │ │ ├── model.jsonl │ │ ├── question.jsonl │ │ ├── reviewer.jsonl │ │ └── rule.json │ ├── mm_utils.py │ ├── model │ ├── __init__.py │ ├── apply_delta.py │ ├── arch.py │ ├── builder.py │ ├── cluster.py │ ├── consolidate.py │ ├── dataloader.py │ ├── language_model │ │ └── llama.py │ ├── make_delta.py │ └── multimodal_encoder │ │ ├── builder.py │ │ ├── clip_encoder.py │ │ ├── eva_encoder.py │ │ ├── eva_vit.py │ │ ├── processor.py │ │ └── utils.py │ ├── train │ ├── llama_flash_attn_monkey_patch.py │ ├── train.py │ ├── train_mem.py │ └── trainer.py │ └── utils.py ├── requirements.txt ├── scripts ├── train_13b.sh ├── train_7b.sh └── val_7b_video.sh ├── tools ├── eval_davis17.py ├── eval_mevis.py ├── eval_revos.py ├── generate_foreground_mask.py ├── metrics.py ├── zip_mp_mevis.py └── zip_mp_refytvos.py ├── train_ds.py ├── utils ├── ade20k_classes.json ├── chatunivi_dataset.py ├── cocostuff_classes.txt ├── conversation.py ├── d2_datasets │ ├── categories.py │ ├── mevis_utils.py │ ├── refytvos_utils.py │ ├── refytvos_val_videos.py │ └── ytvis_api │ │ ├── __init__.py │ │ ├── ytvos.py │ │ └── ytvoseval.py ├── data_processing.py ├── dataset.py ├── dataset_config.py ├── grefcoco.py ├── grefer.py ├── random_list.py ├── reason_seg_dataset.py ├── refer.py ├── refer_seg_dataset.py ├── rvos_dataset.py ├── rvos_eval_dataset.py ├── sem_seg_dataset.py ├── utils.py └── vqa_dataset.py └── utils_llamavid ├── llamavid_client.py └── llamavid_server.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/.gitmodules -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/README.md -------------------------------------------------------------------------------- /XMem/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /XMem/dataset/range_transform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/dataset/range_transform.py -------------------------------------------------------------------------------- /XMem/dataset/reseed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/dataset/reseed.py -------------------------------------------------------------------------------- /XMem/dataset/static_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/dataset/static_dataset.py -------------------------------------------------------------------------------- /XMem/dataset/tps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/dataset/tps.py -------------------------------------------------------------------------------- /XMem/dataset/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/dataset/util.py -------------------------------------------------------------------------------- /XMem/dataset/vos_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/dataset/vos_dataset.py -------------------------------------------------------------------------------- /XMem/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/eval.py -------------------------------------------------------------------------------- /XMem/eval_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/eval_batch.py -------------------------------------------------------------------------------- /XMem/generate_xmem_data_single.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/generate_xmem_data_single.py -------------------------------------------------------------------------------- /XMem/inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /XMem/inference/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /XMem/inference/data/mask_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/data/mask_mapper.py -------------------------------------------------------------------------------- /XMem/inference/data/test_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/data/test_datasets.py -------------------------------------------------------------------------------- /XMem/inference/data/video_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/data/video_reader.py -------------------------------------------------------------------------------- /XMem/inference/inference_core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/inference_core.py -------------------------------------------------------------------------------- /XMem/inference/interact/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/LICENSE -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/controller.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/inference/clicker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/inference/clicker.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/inference/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/inference/evaluation.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/inference/predictors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/inference/predictors/__init__.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/inference/predictors/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/inference/predictors/base.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/inference/predictors/brs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/inference/predictors/brs.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/inference/predictors/brs_functors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/inference/predictors/brs_functors.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/inference/predictors/brs_losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/inference/predictors/brs_losses.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/inference/transforms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/inference/transforms/__init__.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/inference/transforms/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/inference/transforms/base.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/inference/transforms/crops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/inference/transforms/crops.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/inference/transforms/flip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/inference/transforms/flip.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/inference/transforms/limit_longest_side.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/inference/transforms/limit_longest_side.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/inference/transforms/zoom_in.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/inference/transforms/zoom_in.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/inference/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/inference/utils.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/initializer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/initializer.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/is_deeplab_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/is_deeplab_model.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/is_hrnet_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/is_hrnet_model.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/losses.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/metrics.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/modeling/basic_blocks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/modeling/basic_blocks.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/modeling/deeplab_v3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/modeling/deeplab_v3.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/modeling/hrnet_ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/modeling/hrnet_ocr.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/modeling/ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/modeling/ocr.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/modeling/resnet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/modeling/resnet.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/modeling/resnetv1b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/modeling/resnetv1b.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/ops.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/syncbn/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/syncbn/LICENSE -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/syncbn/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/syncbn/README.md -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/syncbn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/syncbn/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/syncbn/modules/functional/__init__.py: -------------------------------------------------------------------------------- 1 | from .syncbn import batchnorm2d_sync 2 | -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/syncbn/modules/functional/_csrc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/syncbn/modules/functional/_csrc.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/syncbn/modules/functional/csrc/bn.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/syncbn/modules/functional/csrc/bn.h -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/syncbn/modules/functional/csrc/cuda/bn_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/syncbn/modules/functional/csrc/cuda/bn_cuda.cu -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/syncbn/modules/functional/csrc/cuda/common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/syncbn/modules/functional/csrc/cuda/common.h -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/syncbn/modules/functional/csrc/cuda/ext_lib.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/syncbn/modules/functional/csrc/cuda/ext_lib.h -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/syncbn/modules/functional/csrc/ext_lib.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/syncbn/modules/functional/csrc/ext_lib.cpp -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/syncbn/modules/functional/syncbn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/syncbn/modules/functional/syncbn.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/syncbn/modules/nn/__init__.py: -------------------------------------------------------------------------------- 1 | from .syncbn import * 2 | -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/model/syncbn/modules/nn/syncbn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/model/syncbn/modules/nn/syncbn.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/utils/cython/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/utils/cython/__init__.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/utils/cython/_get_dist_maps.pyx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/utils/cython/_get_dist_maps.pyx -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/utils/cython/_get_dist_maps.pyxbld: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/utils/cython/_get_dist_maps.pyxbld -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/utils/cython/dist_maps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/utils/cython/dist_maps.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/utils/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/utils/misc.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs/utils/vis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs/utils/vis.py -------------------------------------------------------------------------------- /XMem/inference/interact/fbrs_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/fbrs_controller.py -------------------------------------------------------------------------------- /XMem/inference/interact/gui.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/gui.py -------------------------------------------------------------------------------- /XMem/inference/interact/gui_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/gui_utils.py -------------------------------------------------------------------------------- /XMem/inference/interact/interaction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/interaction.py -------------------------------------------------------------------------------- /XMem/inference/interact/interactive_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/interactive_utils.py -------------------------------------------------------------------------------- /XMem/inference/interact/resource_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/resource_manager.py -------------------------------------------------------------------------------- /XMem/inference/interact/s2m/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /XMem/inference/interact/s2m/_deeplab.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/s2m/_deeplab.py -------------------------------------------------------------------------------- /XMem/inference/interact/s2m/s2m_network.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/s2m/s2m_network.py -------------------------------------------------------------------------------- /XMem/inference/interact/s2m/s2m_resnet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/s2m/s2m_resnet.py -------------------------------------------------------------------------------- /XMem/inference/interact/s2m/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/s2m/utils.py -------------------------------------------------------------------------------- /XMem/inference/interact/s2m_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/s2m_controller.py -------------------------------------------------------------------------------- /XMem/inference/interact/timer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/interact/timer.py -------------------------------------------------------------------------------- /XMem/inference/kv_memory_store.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/kv_memory_store.py -------------------------------------------------------------------------------- /XMem/inference/memory_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/inference/memory_manager.py -------------------------------------------------------------------------------- /XMem/interactive_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/interactive_demo.py -------------------------------------------------------------------------------- /XMem/merge_multi_scale.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/merge_multi_scale.py -------------------------------------------------------------------------------- /XMem/merge_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/merge_results.py -------------------------------------------------------------------------------- /XMem/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /XMem/model/aggregate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/model/aggregate.py -------------------------------------------------------------------------------- /XMem/model/cbam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/model/cbam.py -------------------------------------------------------------------------------- /XMem/model/group_modules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/model/group_modules.py -------------------------------------------------------------------------------- /XMem/model/losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/model/losses.py -------------------------------------------------------------------------------- /XMem/model/memory_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/model/memory_util.py -------------------------------------------------------------------------------- /XMem/model/modules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/model/modules.py -------------------------------------------------------------------------------- /XMem/model/network.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/model/network.py -------------------------------------------------------------------------------- /XMem/model/resnet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/model/resnet.py -------------------------------------------------------------------------------- /XMem/model/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/model/trainer.py -------------------------------------------------------------------------------- /XMem/requirements.txt: -------------------------------------------------------------------------------- 1 | progressbar2 2 | gdown 3 | hickle 4 | tensorboard 5 | numpy -------------------------------------------------------------------------------- /XMem/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /XMem/scripts/download_bl30k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/scripts/download_bl30k.py -------------------------------------------------------------------------------- /XMem/scripts/download_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/scripts/download_datasets.py -------------------------------------------------------------------------------- /XMem/scripts/download_models.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/scripts/download_models.sh -------------------------------------------------------------------------------- /XMem/scripts/download_models_demo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/scripts/download_models_demo.sh -------------------------------------------------------------------------------- /XMem/scripts/expand_long_vid.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/scripts/expand_long_vid.py -------------------------------------------------------------------------------- /XMem/scripts/resize_youtube.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/scripts/resize_youtube.py -------------------------------------------------------------------------------- /XMem/tracking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/tracking.py -------------------------------------------------------------------------------- /XMem/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/train.py -------------------------------------------------------------------------------- /XMem/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /XMem/util/configuration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/util/configuration.py -------------------------------------------------------------------------------- /XMem/util/davis_subset.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/util/davis_subset.txt -------------------------------------------------------------------------------- /XMem/util/image_saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/util/image_saver.py -------------------------------------------------------------------------------- /XMem/util/load_subset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/util/load_subset.py -------------------------------------------------------------------------------- /XMem/util/log_integrator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/util/log_integrator.py -------------------------------------------------------------------------------- /XMem/util/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/util/logger.py -------------------------------------------------------------------------------- /XMem/util/palette.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/util/palette.py -------------------------------------------------------------------------------- /XMem/util/tensor_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/util/tensor_util.py -------------------------------------------------------------------------------- /XMem/util/yv_subset.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/XMem/util/yv_subset.txt -------------------------------------------------------------------------------- /assert/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/assert/architecture.png -------------------------------------------------------------------------------- /assert/performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/assert/performance.png -------------------------------------------------------------------------------- /merge_lora_weights_and_save_hf_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/merge_lora_weights_and_save_hf_model.py -------------------------------------------------------------------------------- /model/VISA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/VISA.py -------------------------------------------------------------------------------- /model/llava/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import LlavaLlamaForCausalLM 2 | -------------------------------------------------------------------------------- /model/llava/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/constants.py -------------------------------------------------------------------------------- /model/llava/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/conversation.py -------------------------------------------------------------------------------- /model/llava/mm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/mm_utils.py -------------------------------------------------------------------------------- /model/llava/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/__init__.py -------------------------------------------------------------------------------- /model/llava/model/apply_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/apply_delta.py -------------------------------------------------------------------------------- /model/llava/model/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/builder.py -------------------------------------------------------------------------------- /model/llava/model/consolidate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/consolidate.py -------------------------------------------------------------------------------- /model/llava/model/language_model/llava_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/language_model/llava_llama.py -------------------------------------------------------------------------------- /model/llava/model/language_model/llava_mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/language_model/llava_mpt.py -------------------------------------------------------------------------------- /model/llava/model/language_model/mpt/adapt_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/language_model/mpt/adapt_tokenizer.py -------------------------------------------------------------------------------- /model/llava/model/language_model/mpt/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/language_model/mpt/attention.py -------------------------------------------------------------------------------- /model/llava/model/language_model/mpt/blocks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/language_model/mpt/blocks.py -------------------------------------------------------------------------------- /model/llava/model/language_model/mpt/configuration_mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/language_model/mpt/configuration_mpt.py -------------------------------------------------------------------------------- /model/llava/model/language_model/mpt/custom_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/language_model/mpt/custom_embedding.py -------------------------------------------------------------------------------- /model/llava/model/language_model/mpt/flash_attn_triton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/language_model/mpt/flash_attn_triton.py -------------------------------------------------------------------------------- /model/llava/model/language_model/mpt/hf_prefixlm_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/language_model/mpt/hf_prefixlm_converter.py -------------------------------------------------------------------------------- /model/llava/model/language_model/mpt/meta_init_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/language_model/mpt/meta_init_context.py -------------------------------------------------------------------------------- /model/llava/model/language_model/mpt/modeling_mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/language_model/mpt/modeling_mpt.py -------------------------------------------------------------------------------- /model/llava/model/language_model/mpt/norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/language_model/mpt/norm.py -------------------------------------------------------------------------------- /model/llava/model/language_model/mpt/param_init_fns.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/language_model/mpt/param_init_fns.py -------------------------------------------------------------------------------- /model/llava/model/llava_arch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/llava_arch.py -------------------------------------------------------------------------------- /model/llava/model/make_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/make_delta.py -------------------------------------------------------------------------------- /model/llava/model/multimodal_encoder/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/multimodal_encoder/builder.py -------------------------------------------------------------------------------- /model/llava/model/multimodal_encoder/clip_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/multimodal_encoder/clip_encoder.py -------------------------------------------------------------------------------- /model/llava/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/model/utils.py -------------------------------------------------------------------------------- /model/llava/train/llama_flash_attn_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/train/llama_flash_attn_monkey_patch.py -------------------------------------------------------------------------------- /model/llava/train/llava_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/train/llava_trainer.py -------------------------------------------------------------------------------- /model/llava/train/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/train/train.py -------------------------------------------------------------------------------- /model/llava/train/train_mem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/train/train_mem.py -------------------------------------------------------------------------------- /model/llava/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/llava/utils.py -------------------------------------------------------------------------------- /model/segment_anything/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/segment_anything/__init__.py -------------------------------------------------------------------------------- /model/segment_anything/automatic_mask_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/segment_anything/automatic_mask_generator.py -------------------------------------------------------------------------------- /model/segment_anything/build_sam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/segment_anything/build_sam.py -------------------------------------------------------------------------------- /model/segment_anything/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/segment_anything/modeling/__init__.py -------------------------------------------------------------------------------- /model/segment_anything/modeling/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/segment_anything/modeling/common.py -------------------------------------------------------------------------------- /model/segment_anything/modeling/image_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/segment_anything/modeling/image_encoder.py -------------------------------------------------------------------------------- /model/segment_anything/modeling/mask_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/segment_anything/modeling/mask_decoder.py -------------------------------------------------------------------------------- /model/segment_anything/modeling/prompt_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/segment_anything/modeling/prompt_encoder.py -------------------------------------------------------------------------------- /model/segment_anything/modeling/sam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/segment_anything/modeling/sam.py -------------------------------------------------------------------------------- /model/segment_anything/modeling/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/segment_anything/modeling/transformer.py -------------------------------------------------------------------------------- /model/segment_anything/predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/segment_anything/predictor.py -------------------------------------------------------------------------------- /model/segment_anything/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/segment_anything/utils/__init__.py -------------------------------------------------------------------------------- /model/segment_anything/utils/amg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/segment_anything/utils/amg.py -------------------------------------------------------------------------------- /model/segment_anything/utils/onnx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/segment_anything/utils/onnx.py -------------------------------------------------------------------------------- /model/segment_anything/utils/transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/segment_anything/utils/transforms.py -------------------------------------------------------------------------------- /model/tf/modeling_outputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/tf/modeling_outputs.py -------------------------------------------------------------------------------- /model/univi/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import ChatUniViLlamaForCausalLM 2 | -------------------------------------------------------------------------------- /model/univi/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/config/__init__.py -------------------------------------------------------------------------------- /model/univi/config/dataset_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/config/dataset_config.py -------------------------------------------------------------------------------- /model/univi/config/model_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/config/model_config.py -------------------------------------------------------------------------------- /model/univi/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/constants.py -------------------------------------------------------------------------------- /model/univi/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/conversation.py -------------------------------------------------------------------------------- /model/univi/demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/demo.py -------------------------------------------------------------------------------- /model/univi/eval/evaluate/evaluate_benchmark_1_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/evaluate/evaluate_benchmark_1_correctness.py -------------------------------------------------------------------------------- /model/univi/eval/evaluate/evaluate_benchmark_2_detailed_orientation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/evaluate/evaluate_benchmark_2_detailed_orientation.py -------------------------------------------------------------------------------- /model/univi/eval/evaluate/evaluate_benchmark_3_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/evaluate/evaluate_benchmark_3_context.py -------------------------------------------------------------------------------- /model/univi/eval/evaluate/evaluate_benchmark_4_temporal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/evaluate/evaluate_benchmark_4_temporal.py -------------------------------------------------------------------------------- /model/univi/eval/evaluate/evaluate_benchmark_5_consistency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/evaluate/evaluate_benchmark_5_consistency.py -------------------------------------------------------------------------------- /model/univi/eval/evaluate/evaluate_gpt_review_visual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/evaluate/evaluate_gpt_review_visual.py -------------------------------------------------------------------------------- /model/univi/eval/evaluate/evaluate_science_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/evaluate/evaluate_science_qa.py -------------------------------------------------------------------------------- /model/univi/eval/evaluate/evaluate_video_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/evaluate/evaluate_video_qa.py -------------------------------------------------------------------------------- /model/univi/eval/evaluate/summarize_gpt_review.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/evaluate/summarize_gpt_review.py -------------------------------------------------------------------------------- /model/univi/eval/model_coco_vqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/model_coco_vqa.py -------------------------------------------------------------------------------- /model/univi/eval/model_video_consistency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/model_video_consistency.py -------------------------------------------------------------------------------- /model/univi/eval/model_video_general.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/model_video_general.py -------------------------------------------------------------------------------- /model/univi/eval/model_video_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/model_video_qa.py -------------------------------------------------------------------------------- /model/univi/eval/model_vqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/model_vqa.py -------------------------------------------------------------------------------- /model/univi/eval/model_vqa_scienceqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/model_vqa_scienceqa.py -------------------------------------------------------------------------------- /model/univi/eval/questions/coco2014_val_qa_eval/qa90_gpt4_answer.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/coco2014_val_qa_eval/qa90_gpt4_answer.jsonl -------------------------------------------------------------------------------- /model/univi/eval/questions/coco2014_val_qa_eval/qa90_questions.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/coco2014_val_qa_eval/qa90_questions.jsonl -------------------------------------------------------------------------------- /model/univi/eval/questions/coco_pope/coco_pope_adversarial.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/coco_pope/coco_pope_adversarial.jsonl -------------------------------------------------------------------------------- /model/univi/eval/questions/coco_pope/coco_pope_popular.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/coco_pope/coco_pope_popular.jsonl -------------------------------------------------------------------------------- /model/univi/eval/questions/coco_pope/coco_pope_random.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/coco_pope/coco_pope_random.jsonl -------------------------------------------------------------------------------- /model/univi/eval/questions/scienceqa/pid_splits.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/scienceqa/pid_splits.json -------------------------------------------------------------------------------- /model/univi/eval/questions/scienceqa/problems.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/scienceqa/problems.json -------------------------------------------------------------------------------- /model/univi/eval/questions/scienceqa/test_QCM-LEA.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/scienceqa/test_QCM-LEA.json -------------------------------------------------------------------------------- /model/univi/eval/questions/video_qa/activitynet_a_list.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/video_qa/activitynet_a_list.json -------------------------------------------------------------------------------- /model/univi/eval/questions/video_qa/activitynet_qa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/video_qa/activitynet_qa.json -------------------------------------------------------------------------------- /model/univi/eval/questions/video_qa/consistency_qa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/video_qa/consistency_qa.json -------------------------------------------------------------------------------- /model/univi/eval/questions/video_qa/generic_qa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/video_qa/generic_qa.json -------------------------------------------------------------------------------- /model/univi/eval/questions/video_qa/msrvtt_a_list.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/video_qa/msrvtt_a_list.json -------------------------------------------------------------------------------- /model/univi/eval/questions/video_qa/msrvtt_qa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/video_qa/msrvtt_qa.json -------------------------------------------------------------------------------- /model/univi/eval/questions/video_qa/msvd_a_list.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/video_qa/msvd_a_list.json -------------------------------------------------------------------------------- /model/univi/eval/questions/video_qa/msvd_qa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/video_qa/msvd_qa.json -------------------------------------------------------------------------------- /model/univi/eval/questions/video_qa/temporal_qa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/video_qa/temporal_qa.json -------------------------------------------------------------------------------- /model/univi/eval/questions/video_qa/tgif_a_list.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/video_qa/tgif_a_list.json -------------------------------------------------------------------------------- /model/univi/eval/questions/video_qa/tgif_qa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/questions/video_qa/tgif_qa.json -------------------------------------------------------------------------------- /model/univi/eval/table/caps_boxes_coco2014_val_80.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/table/caps_boxes_coco2014_val_80.jsonl -------------------------------------------------------------------------------- /model/univi/eval/table/model.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/table/model.jsonl -------------------------------------------------------------------------------- /model/univi/eval/table/question.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/table/question.jsonl -------------------------------------------------------------------------------- /model/univi/eval/table/reviewer.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/table/reviewer.jsonl -------------------------------------------------------------------------------- /model/univi/eval/table/rule.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/eval/table/rule.json -------------------------------------------------------------------------------- /model/univi/mm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/mm_utils.py -------------------------------------------------------------------------------- /model/univi/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/model/__init__.py -------------------------------------------------------------------------------- /model/univi/model/apply_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/model/apply_delta.py -------------------------------------------------------------------------------- /model/univi/model/arch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/model/arch.py -------------------------------------------------------------------------------- /model/univi/model/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/model/builder.py -------------------------------------------------------------------------------- /model/univi/model/cluster.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/model/cluster.py -------------------------------------------------------------------------------- /model/univi/model/consolidate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/model/consolidate.py -------------------------------------------------------------------------------- /model/univi/model/dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/model/dataloader.py -------------------------------------------------------------------------------- /model/univi/model/language_model/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/model/language_model/llama.py -------------------------------------------------------------------------------- /model/univi/model/make_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/model/make_delta.py -------------------------------------------------------------------------------- /model/univi/model/multimodal_encoder/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/model/multimodal_encoder/builder.py -------------------------------------------------------------------------------- /model/univi/model/multimodal_encoder/clip_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/model/multimodal_encoder/clip_encoder.py -------------------------------------------------------------------------------- /model/univi/model/multimodal_encoder/eva_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/model/multimodal_encoder/eva_encoder.py -------------------------------------------------------------------------------- /model/univi/model/multimodal_encoder/eva_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/model/multimodal_encoder/eva_vit.py -------------------------------------------------------------------------------- /model/univi/model/multimodal_encoder/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/model/multimodal_encoder/processor.py -------------------------------------------------------------------------------- /model/univi/model/multimodal_encoder/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/model/multimodal_encoder/utils.py -------------------------------------------------------------------------------- /model/univi/train/llama_flash_attn_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/train/llama_flash_attn_monkey_patch.py -------------------------------------------------------------------------------- /model/univi/train/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/train/train.py -------------------------------------------------------------------------------- /model/univi/train/train_mem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/train/train_mem.py -------------------------------------------------------------------------------- /model/univi/train/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/train/trainer.py -------------------------------------------------------------------------------- /model/univi/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/model/univi/utils.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/train_13b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/scripts/train_13b.sh -------------------------------------------------------------------------------- /scripts/train_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/scripts/train_7b.sh -------------------------------------------------------------------------------- /scripts/val_7b_video.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/scripts/val_7b_video.sh -------------------------------------------------------------------------------- /tools/eval_davis17.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/tools/eval_davis17.py -------------------------------------------------------------------------------- /tools/eval_mevis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/tools/eval_mevis.py -------------------------------------------------------------------------------- /tools/eval_revos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/tools/eval_revos.py -------------------------------------------------------------------------------- /tools/generate_foreground_mask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/tools/generate_foreground_mask.py -------------------------------------------------------------------------------- /tools/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/tools/metrics.py -------------------------------------------------------------------------------- /tools/zip_mp_mevis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/tools/zip_mp_mevis.py -------------------------------------------------------------------------------- /tools/zip_mp_refytvos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/tools/zip_mp_refytvos.py -------------------------------------------------------------------------------- /train_ds.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/train_ds.py -------------------------------------------------------------------------------- /utils/ade20k_classes.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/ade20k_classes.json -------------------------------------------------------------------------------- /utils/chatunivi_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/chatunivi_dataset.py -------------------------------------------------------------------------------- /utils/cocostuff_classes.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/cocostuff_classes.txt -------------------------------------------------------------------------------- /utils/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/conversation.py -------------------------------------------------------------------------------- /utils/d2_datasets/categories.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/d2_datasets/categories.py -------------------------------------------------------------------------------- /utils/d2_datasets/mevis_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/d2_datasets/mevis_utils.py -------------------------------------------------------------------------------- /utils/d2_datasets/refytvos_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/d2_datasets/refytvos_utils.py -------------------------------------------------------------------------------- /utils/d2_datasets/refytvos_val_videos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/d2_datasets/refytvos_val_videos.py -------------------------------------------------------------------------------- /utils/d2_datasets/ytvis_api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/d2_datasets/ytvis_api/ytvos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/d2_datasets/ytvis_api/ytvos.py -------------------------------------------------------------------------------- /utils/d2_datasets/ytvis_api/ytvoseval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/d2_datasets/ytvis_api/ytvoseval.py -------------------------------------------------------------------------------- /utils/data_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/data_processing.py -------------------------------------------------------------------------------- /utils/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/dataset.py -------------------------------------------------------------------------------- /utils/dataset_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/dataset_config.py -------------------------------------------------------------------------------- /utils/grefcoco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/grefcoco.py -------------------------------------------------------------------------------- /utils/grefer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/grefer.py -------------------------------------------------------------------------------- /utils/random_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/random_list.py -------------------------------------------------------------------------------- /utils/reason_seg_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/reason_seg_dataset.py -------------------------------------------------------------------------------- /utils/refer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/refer.py -------------------------------------------------------------------------------- /utils/refer_seg_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/refer_seg_dataset.py -------------------------------------------------------------------------------- /utils/rvos_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/rvos_dataset.py -------------------------------------------------------------------------------- /utils/rvos_eval_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/rvos_eval_dataset.py -------------------------------------------------------------------------------- /utils/sem_seg_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/sem_seg_dataset.py -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/utils.py -------------------------------------------------------------------------------- /utils/vqa_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils/vqa_dataset.py -------------------------------------------------------------------------------- /utils_llamavid/llamavid_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils_llamavid/llamavid_client.py -------------------------------------------------------------------------------- /utils_llamavid/llamavid_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cilinyan/VISA/HEAD/utils_llamavid/llamavid_server.py --------------------------------------------------------------------------------